clear all
set more off
set maxvar 10000


*---------------------------*
* PART 1: CLEAN CENSUS 
*---------------------------*

    foreach d of numlist 2/8 {
                
        if `d'!=4 {
    
            use "$CensusData/input/Census_1910to2010_1pct_raw.dta",clear
            keep if year==19`d'0
    
        * Identify fathers
            keep if age<18 //Restrict to children younger than 18
            keep serial poploc age

            replace poploc=. if poploc==0 
            drop if poploc==. //Exclude children without a father in the house

            bysort serial poploc: keep if _n==1 //keep all unique father ids. Some fathers will have multiple children in the Census
            rename poploc pernum
            drop age

            tempfile children 
            save `children'
    
           * Keep the sample of fathers 
            use "$CensusData/input/Census_1910to2010_1pct_raw.dta",clear
            keep if year==19`d'0
            merge 1:1 serial pernum using `children'

            keep if _merge==3 
            drop _merge

            * Keep black and white fathers ages 30 to 50
            keep if inrange(age,30,50) 
            keep if race==1 | race==2 

        * Crosswalk Census occupations to ANES occupations
            sort occ1950 
            replace occ1950=. if occ1950>=980

            * Separate people with occupations in 200's based on self-employment
            replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1
    
            * Crosswalk
            merge m:1 occ1950 using "$Crosswalks/Crosswalk_1950Census_toANES.dta"
            tab occ1950ej if _merge==1, m
            tab occ1950ej if _merge==2, m 
            drop if _merge==2
            drop _merge
    
            * Fix self-employment assigment
            replace occ1950ej=21 if occ1950>=1200 & occ1950<=1300
            ren occ1950ej fatheroccej

        * Variable for table
            sort fatheroccej
            egen groupnum = group(fatheroccej) //Note: fatheroccej is already sorted from the merge
            tab groupnum, m

        * Fix Census weight to have mean 1
            gen wgt_sex_race=.
            sum perwt
            local weight_avg = r(mean)
        
            replace wgt_sex_race = perwt/`weight_avg' 
            summ wgt_sex_race
            
        * Save cleaned Census
            keep year serial pernum perwt hhwt age sex race classwkr classwkrd fatheroccej groupnum wgt_sex_race
            order year serial pernum
            sort serial pernum
            duplicates list serial pernum 
            compress 
            tempfile census19`d'0_fathers30_50
            save `census19`d'0_fathers30_50', replace
                 
        }
        
        else  { 
        
            /*Accountants are not included in the publically-available
              1940 IPUMS sample, so the full 1940 sample was downloaded 
              from the NBER server. The file "fatherocc_shares_1940census.dta"
              is already restricted to white and black fathers aged 30-50. */
            use "$CensusData/output/fatherocc_shares_1940census.dta", clear
            
            sort occ1950ej

        * Variable for table
            egen groupnum = group(occ1950ej) 
            
            keep occ1950ej share groupnum
            compress 
            tempfile census1940_fathers30_50
            save `census1940_fathers30_50', replace
            
        }   
       
    }

*------------------------------------------------------*
* PART 2: PREPARE SURVEY DATA
*------------------------------------------------------*

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1
    sort fatheroccej
  
* Merge in file with all coarsened father occupations and occupation labels
    merge m:1 fatheroccej using "$SupplementaryData/Occ1950ej_definitions.dta"
    drop _merge

* Variable for table
    egen groupnum = group(fatheroccej) //Note: fatheroccej is already sorted from the merge
    tab groupnum, m
    
    tempfile surveys
    save `surveys', replace

*------------------------------------------------------*
* PART 3: MAKE TABLE (EARLIER CENSUS VERSION)
*------------------------------------------------------*

    file open rb2_earliercensus using "$Mydirectory2/appendix_c/rb2_earliercensus_stata2tex.tex", write replace

    foreach o of numlist 1/28 {

        use `surveys', clear    
        levelsof fatheroccej_name if groupnum==`o', local(occlabel) clean //Note: "Clean" option takes away compound double quotes.

        file write rb2_earliercensus "`occlabel'"
            
        foreach d of numlist 2/7  {
                    
        /*Find share of Census fathers in the given decade 
          who reported a particular occupation */
            use `census19`d'0_fathers30_50', clear
        
            /*Notes:  (1) Shares are already *100 in fatherocc_shares_1940census.dta. 
                      (2) No need to weight 1940 shares because 1940 is fully 
                          representative of the population. (And perwt is in fact 
                          "1" for all respondents in the 1940 Census file.) */
            if `d'==4 {
                summ share if groupnum==`o'
                local share_c = `r(mean)' 
                local share_c2: display %-09.2fc `share_c'          
                file write rb2_earliercensus "& `share_c2'"  
            }
        
            else  {
                gen occ`o' = (groupnum==`o')
                tab occ`o', m 
        
                summ occ`o' [aw=wgt_sex_race]
                    local share_c = `r(mean)'*100
                    local share_c2: display %-09.2fc `share_c'
            
                file write rb2_earliercensus "& `share_c2'"
            }
        
        /*Find share of survey respondents who retrospectively 
          reported each father occupation. Look at the 
          answers of respondents from birth cohorts that would 
          have made them between 1-10 years old at the time of 
          the given Census. */
            use `surveys', clear
            
            if `d'==2 keep if decade==1910 
            if `d'==3 keep if decade==1920 
            if `d'==4 keep if decade==1930 
            if `d'==5 keep if decade==1940 
            if `d'==6 keep if decade==1950 
            if `d'==7 keep if decade==1960 
        
            count
                        
            if `d'==7 & `o'==24 file write rb2_earliercensus "& ---" //no private hh workers (fatheroccej==65) in 1960 bcohorts
            
            else  {
                
                gen occ`o' = (groupnum==`o')
                tab occ`o', m 
        
                summ occ`o' [aw=wgt_sex_race] 
                    local share_s = `r(mean)'*100
                    local share_s2: display %-09.2fc `share_s'
            
                file write rb2_earliercensus "& `share_s2'"
            }
        }
        
        file write rb2_earliercensus " \\" _n
        
        if `o'!=28 file write rb2_earliercensus "\addlinespace[0.25ex]" _n
        else file write rb2_earliercensus ""
        
    }   

    file close rb2_earliercensus